---
title: "Analysis and Results: TFA and Voter Participation"
output: pdf_document
---

```{r setup, include=FALSE, warning=FALSE, message=FALSE}

# Clear global environment
rm(list=ls()) 
gc() #manage memory

# Load packages using pacman
library(pacman)
pacman::p_unload(all)
pacman::p_load(magrittr, tidyverse,
               data.table,
               rdrobust,
               rmarkdown,
               here, gridExtra, lubridate,
               estimatr, modelsummary
)

# Set rmarkdown default 
knitr::opts_chunk$set(
	fig.align = "center",
	message = FALSE,
	warning = FALSE,
	cache = TRUE,
	echo = FALSE
)

i_am("116_analysis.Rmd")
setwd(here())

# Read in functions for downloading cleaned data files 
source(here("Functions", "functions_open_clean_data_for_analysis.R"))

nbins=30

```

```{r rdfun, include=F}
### -------------------------------------------------- 
### ---- PLOT FUNCTION ---- 
### -------------------------------------------------- 


plot_rd <- function(dat, dv, z="zscore", mat4="matriculated4", w=NULL, t="", d="", 
                    nbins=NULL, v=c("1x2", "2x2"), y_lim=c(0.0,0.7))
{
  # Drop any missing obs (non-elig. obs)
  dat %<>% select(dv,z,w,mat4) %>% drop_na() 
  dat$dv <- c(dat[[dv]])
  
  # w variable
  if(!is.null(w)) w <- dat[[w]]
  
  # Get rd results
  resCACE <- rdrobust(y=dat[[dv]], x=dat[[z]], fuzzy=dat[[mat4]], weights=w, all=TRUE)
  resITT <- rdrobust(y=dat[[dv]], x=dat[[z]], weights=w, all=TRUE)
  
  # Save beta(b) and standard error (se) estimates
  rnd <- 3 #digits to round
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(rnd)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(rnd)
  bITT  <- resITT$Estimate[1,"tau.bc"] %>% round(rnd)
  seITT <- resITT$Estimate[1,"se.rb"] %>% round(rnd)
  
    # Save and print conventional beta(b), for reference (only robust reported in paper)
  cace_con  <- resCACE$Estimate[1,"tau.us"] %>% round(rnd)
  cace_con_se  <- resCACE$Estimate[1,"se.us"] %>% round(rnd)
  itt_con <- resITT$Estimate[1,"tau.us"] %>% round(rnd)
  itt_con_se <- resITT$Estimate[1,"se.us"] %>% round(rnd)
  
  # Set ITT and CACE text
  if(v=="1x2"){
    estText <- paste0("CACE: ",bCACE, " \n (",seCACE,")",
                      "\n  ITT: ",bITT," \n (",seITT,")")    
  } else {
    estText <- paste0("CACE: ",bCACE, "  (",seCACE,")",
                      "\n  ITT: ",bITT,"  (",seITT,")")
  }
  #print(estText)
  
  print(paste0("Conventional CACE: ", cace_con, " (", cace_con_se, ") \n",
              "Conventional ITT: ", itt_con, " (", itt_con_se, ")"))
  
  # Create plot  
  
  dat %>%
  mutate(bin = cut_width(zscore, width=.025, boundary=-6)) %>%
  group_by(bin) %>%
  summarize(n=n(),
            mn=mean(dv, na.rm=T),
            z=min(zscore)) %>%
  ggplot() +
  geom_point(aes(x=z, y=mn, size=n), color = "#756bb1", fill = alpha("#bcbddc", 0.4), shape = 21, stroke = 1.25) +
  geom_smooth(data=dat[dat$zscore<0,], aes(x=zscore, y=dv), formula = y ~ poly(x, 4), se=T) +
  geom_smooth(data=dat[dat$zscore>0,], aes(x=zscore, y=dv), formula = y ~ poly(x, 4), se=T) + 
    theme_bw() + coord_cartesian(ylim=y_lim, xlim=c(-1,1)) + 
    annotate("text", x=0, y=max(y_lim)-.1, label=estText, hjust=1) + 
    scale_y_continuous(breaks=c(0.0,(.5*max(y_lim)),(max(y_lim))),
                       labels=c("0%", paste0((.5*100*max(y_lim)), "%"), paste0((100*max(y_lim)), "%"))) + 
    ggtitle(paste(d, "\n", t)) +
    xlab("TFA Score Centered at 0") + ylab("Outcome") +
    geom_vline(xintercept=0, linetype=2, color="skyblue") +
    scale_size_continuous(range=c(.2,3))

}

calc_rd <- function(dat, dv, z="zscore", mat4="matriculated4", w=NULL, t="", d="", 
                    nbins=NULL, v=c("1x2", "2x2"), y_lim=c(0.0,0.7))
{
  # Drop any missing obs (non-elig. obs)
  dat %<>% select(dv,z,w,mat4) %>% drop_na() 
  dat$dv <- c(dat[[dv]])
  
  # w variable
  if(!is.null(w)) w <- dat[[w]]
  
  # Get rd results
  resCACE <- rdrobust(y=dat[[dv]], x=dat[[z]], fuzzy=dat[[mat4]], weights=w, all=TRUE)
  resITT <- rdrobust(y=dat[[dv]], x=dat[[z]], weights=w, all=TRUE)
  
  # Save beta(b) and standard error (se) estimates
  rnd <- 3 #digits to round
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(rnd)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(rnd)
  bITT  <- resITT$Estimate[1,"tau.bc"] %>% round(rnd)
  seITT <- resITT$Estimate[1,"se.rb"] %>% round(rnd)
  
  return(as.data.frame(list("est"=c(bCACE, bITT),
                       "se"=c(seCACE, seITT),
                       "model"=c("CACE", "ITT"))))

}
```

```{r rdfuns_supp, include=FALSE, warning=FALSE, message=FALSE}
# Function for generating dv name in plot_rd_1x2()
gen_dv <- function(per=c("preAPP", "postAPP", "post1Y", "postTFA"), 
                   dv_type, elec_type, elec_range)
{
  dv <- paste0(dv_type, "_", per)
  if(elec_type != "") dv <- paste0(dv, "_", elec_type)
  if(elec_range != "") dv <- paste0(dv, "_", elec_range)
  return(dv)
}



# Function for generating 1x2 plot for each period (pre, posttfa)
plot_rd_1x2 <- function(dat, dv_type=c("ever_vote","prop_vote"),
                        elec_type=c("","pres","mid"), 
                        elec_range=c("","08to5","10to5","12to5"),
                        z="zscore", mat4="matriculated4", 
                        w=NULL, t="", d="", nbins=50,
                        ay_range=c(2007:2015),  
                        dedup_name="Survey State")
{
  par(mfrow=c(1,2), oma = c(0, 0, 0, 0))
  
  try(plot_rd(filter(dat, appyear %in% ay_range), nbins=nbins, 
              dv=gen_dv(per="postAPP", dv_type, elec_type, elec_range),
              t="(b) After Application", d=dedup_name, v="1x2"))
  
  try(plot_rd(filter(dat, appyear %in% ay_range), nbins=nbins, 
              dv=gen_dv(per="postTFA", dv_type, elec_type, elec_range),
              t="(c) After TFA Completion", d=dedup_name, v="1x2"))
}
```

This document produces the results and figures reported in the main text and appendix of "Civilian National Service Programs Can Powerfully Increase Youth Voter Turnout." Compile the file to produce figures in the /116_analysis-files folder.

## Match Strategy 1: Survey Variables 

1) Use survey-based (not application) birth year to find matches within states; use only matches from survey-based states for the post-treatment results; use application-based state for pre-treatment results, but always restrict to the sample of survey respondents to ensure results are comparable across models. This is Match Strategy 1 in the paper, reported in figure 2 (though see below for the code to generate the coefficient plot in figure 1) and Section A7 of the appendix.. 

```{r load1}
#navigate to survey-only folder
setwd(here("Survey_Only"))
#load dataset with flags
load(here("Survey_Only", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
#load posttreatment (6) and pretreatment (10) deduplication strategies
  #uses custom data-loading function
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE,
                        rm_na_state=TRUE, date_txt="3-17") #Hybrid of 3 app sources
#convert to dataframes
dat_st <- as.data.frame(dat_st)
dat_hybrid <- as.data.frame(dat_hybrid)
```

```{r s1_p1, fig.dim=c(5,5)}
#generate RD plot using posttreatment dataset
plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_postTFA_12to5", 
        y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")

#print results for reference
summary(rdrobust(y=dat_st[["ever_vote_postTFA_12to5"]], x=dat_st[["zscore"]], fuzzy=dat_st[["matriculated4"]], all=TRUE))
summary(rdrobust(y=dat_st[["ever_vote_postTFA_12to5"]], x=dat_st[["zscore"]], all=TRUE))

```


\clearpage

## Match Strategy 2: Survey and Application Variables

2) Use application-based birth year (including graduation year proxies for 2010-2013 cohorts) to find matches within states. Use only survey-based state for the treatment results, but state and application-based survey for pretreatment results, and restrict to sample of survey respondents for comparability with match 1. This is Match 2 reported in Figure 2 in the paper (but see below for code to generate coefficient plots) and the figure reported in section A7 of the appendix.

```{r load2}
#navigate to combination match folder
setwd(here("Survey and App Match"))

#load both deduplication strategy datasets with custom opening function
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Survey
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Application-based 
dat_st <- as.data.frame(dat_st)
dat_hybrid <- as.data.frame(dat_hybrid)

#load data with survey responses
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started))
#restrict sample to survey takers
dat_hybrid %<>% filter(started==1)
```

```{r s2_p1, fig.dim=c(5,5)}
#generate RD plot using posttreatment dataset
plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_postTFA_12to5", 
        y_lim=c(0,0.7),  v="1x2")  +
        ylab("Voter Turnout")

#print estimates for reference
summary(rdrobust(y=dat_st[["ever_vote_postTFA_12to5"]], x=dat_st[["zscore"]], fuzzy=dat_st$matriculated4, all=TRUE))
summary(rdrobust(y=dat_st[["ever_vote_postTFA_12to5"]], x=dat_st[["zscore"]], all=TRUE))
```


\clearpage

## Strategies 1 and 2: Pre-Treatment Results

Strategy 1 and 2 matches for birth year, but focus now on analyzing pre-application turnout with hybrid application-based state matches. These results are reported in Figure 2 in the main text and in Section A7 of the Appendix.

```{r loadpre_1}
#load datasets using survey birthyear data, application state 
setwd(here("Survey_Only"))
dat_hybrid_1 <- open_data(match=1, dedup=10, csv=TRUE,
                        rm_na_state=TRUE, date_txt="3-17") #Hybrid of 3 app sources
dat_hybrid_1 <- as.data.frame(dat_hybrid_1)
```

```{r loadpre_2}
#load data using application birthyear, application state
setwd(here("Survey and App Match"))
#restrict to survey takers
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started))
dat_hybrid %<>% filter(started==1)
```


```{r pre_p1, fig.dim=c(5,5)}
#plot figure and print results for reference
plot_rd(filter(dat_hybrid_1, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_12to5", 
             y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")

summary(rdrobust(y=dat_hybrid_1[["ever_vote_preAPP_12to5"]], x=dat_hybrid_1[["zscore"]], fuzzy=dat_hybrid_1[["matriculated4"]], all=TRUE))
summary(rdrobust(y=dat_hybrid_1[["ever_vote_preAPP_12to5"]], x=dat_hybrid_1[["zscore"]], all=TRUE))
```



```{r pre_p2, fig.dim=c(5,5)}
#plot figure and print results for reference
plot_rd(filter(dat_hybrid,  appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_12to5", 
             y_lim=c(0,0.7),  v="1x2" ) +
        ylab("Voter Turnout")
        #d="Pre-App Results \n (2012-15 elex, 13-15 cohorts)",
        #t="Hybrid App State, App YoB", y_lim=c(0,0.7), v="1x2")

summary(rdrobust(y=dat_hybrid[["ever_vote_preAPP_12to5"]], x=dat_hybrid[["zscore"]], fuzzy=dat_hybrid[["matriculated4"]], all=TRUE))
summary(rdrobust(y=dat_hybrid[["ever_vote_preAPP_12to5"]], x=dat_hybrid[["zscore"]], all=TRUE))

```




\clearpage

## Coefficient Plot ##

This code generates Figure 2 in the main text, the key results plot. It calculates the effect estimate and standard error for Match 1 and Match 2 posttreatment results, then the Match 1 and Match 2 pretreatment results. It then reshapes the results and presents them in a coefficient plot.

```{r m1_post}
#load survey-only birth year and survey-only state match (match 1 posttreatment)
setwd(here("Survey_Only"))
load(here("Survey_Only", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey
dat_st <- as.data.frame(dat_st)

#calculate and save results
m1_post <- calc_rd(filter(dat_st, appyear %in% c(2007:2015)), dv="ever_vote_postTFA_12to5", v="1x2")
```

```{r m2_post}
#load match 2 data
setwd(here("Survey and App Match"))

dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Survey state
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #application state
dat_st <- as.data.frame(dat_st)
dat_hybrid <- as.data.frame(dat_hybrid)

#limit application-state data to survey takers 
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started))
dat_hybrid %<>% filter(started==1)

#calculate and save posttreatment effect estimates
m2_post <- calc_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_postTFA_12to5", v="1x2")
```

```{r m1_pre}
#load match 1 pre-treatment match strategy
setwd(here("Survey_Only"))
dat_hybrid_1 <- open_data(match=1, dedup=10, csv=TRUE,
                        rm_na_state=TRUE, date_txt="3-17") #Hybrid of 3 app sources
dat_hybrid_1 <- as.data.frame(dat_hybrid_1)

#calculate and save effect estimates
m1_pre <- calc_rd(filter(dat_hybrid_1, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_12to5", v="1x2")
```

```{r m2_pre}
#filter match 2 pretreatment strategy to survey-takers
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started))
dat_hybrid %<>% filter(started==1)

#calculate and save effect estimates
m2_pre <- calc_rd(filter(dat_hybrid,  appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_12to5", v="1x2")
```


```{r coefplot, fig.dim=c(7,10)}
# label each estimate with its match number and pre/post status
m1_post$match <- 1
m1_post$type <- "post"
m2_post$match <- 2
m2_post$type <- "post"
m1_pre$match <- 1
m1_pre$type <- "pre"
m2_pre$match <- 2
m2_pre$type <- "pre"

#bind effects into a dataframe
effs <- bind_rows(m1_post, m2_post, m1_pre, m2_pre)
effs$type <- factor(effs$type, levels=c("pre", "post"), ordered=T)

#create label text to print posttreatment effect estimates
dat_text <- data.frame(
  label = c(paste0(m1_post$est[1], "\n (",  m1_post$se[1], ")"),
            paste0(m2_post$est[1], "\n (",  m2_post$se[1], ")"), 
            paste0(m1_post$est[2], "\n (",  m1_post$se[2], ")"),  
            paste0(m2_post$est[2], "\n (",  m2_post$se[2], ")")),
  model   = c("CACE", "CACE", "ITT", "ITT"),
  x     = c(1.7, 2.3, 1.7, 2.3),
  y     = c(m1_post$est[1], m2_post$est[1], m1_post$est[2], m2_post$est[2]), 
  match = c(1,2,1,2)
)

#plot effects 
ggplot(effs) + 
  geom_point(aes(x=type, y=est, group=factor(match), color=factor(match)), 
             position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(x=type, 
                    ymin=est - 1.96*se,
                    ymax=est + 1.96*se,
                    group=factor(match), color=factor(match)),
                width=.0,
             position = position_dodge(width = 0.5)) +
  facet_wrap(~model, scales="free", ncol=1) +
  theme_bw() + xlab(" ") + ylab("Estimate") + 
  geom_hline(yintercept=0, linetype=2) + 
  scale_color_manual(name="Match Strategy",
                     values=c("chartreuse4", "dodgerblue")) +
  scale_x_discrete(labels=c("Pre-Treatment", "Post-Treatment")) + 
  theme(text=element_text(size=15)) + 
  geom_text(data=dat_text, aes(x = x, y = y, 
                               label = label, 
                               group=factor(match), 
                               color=factor(match)))
```




## Alternative Outcome: Changed Address

This section tests whether applicants admitted to TFA moved states at a higher rate than applicants who were not admitted. This analysis is referenced in the fourth paragraph of the Materials and Methods section of the main text.

```{r load1_rep, results='hide'}
#load data
setwd(here("Survey_Only"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey
dat_st <- as.data.frame(dat_st)

#check whether survey state is equal to current address at time of application
dat_st <- dat_st %>%
  mutate(moved = case_when(state_ca==""~NA_real_,
                           is.na(state_st)~NA_real_,
                           state_ca==state_st~0,
                           state_ca!=state_st~1))

#look at frequency of moves by matriculation rate
prop.table(table(dat_st$moved, dat_st$matriculated4), 2)
```


```{r alt_p1}
#check whether discontinuous at cutoff
plot_rd(filter(dat_st,  appyear %in% c(2007:2015)), nbins=50, dv="moved", 
        d="State Changed between App and Survey",
        t="Respondents with App and Survey State", y_lim=c(0,0.9), v="1x2")


```


## Heterogeneity

This section tests whether the estimates reported in the text are heterogeneous across demographic groups. These analyses are presented in section A8 of the appendix.

```{r load_h}
#load match 1 data
setwd(here("Survey_Only"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey
dat_st <- as.data.frame(dat_st)


```


```{r set1_h}
#load survey data
survey <- haven::read_dta(here("Data", "tfa_data.dta"))
#code demographic categories 
survey <- survey %>%
  mutate(white = case_when(appethnicity=="EUROPEAN"~1,
                           appethnicity==""~NA_real_,
                           T~0))
survey$birthyear <- year(dmy(survey$dateofbirth))
survey$young <- (survey$appyear - survey$birthyear)<=25
dat_st$personid <- as.character(dat_st$personid)

#for each demographic category of interest,
  # create a sample subset for that category
  # calculate the treatment effect 
  # add it to a dataframe of effect estimates, along with a label for the group
dat <- dat_st %>%
  filter(appyear %in% c(2007:2015)) 
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- c("Full Sample", bCACE, seCACE)
  

dat <- left_join(dat_st, select(survey, personid, white), by="personid") %>%
  filter(appyear %in% c(2007:2015)) %>%
  filter(white==1)
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("White", bCACE, seCACE))
  
dat <- left_join(dat_st, select(survey, personid, white), by="personid") %>%
  filter(appyear %in% c(2007:2015)) %>%
  filter(white==0)
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("Non-white", bCACE, seCACE))

dat <- left_join(dat_st, select(survey, personid, gender), by="personid") %>%
  filter(appyear %in% c(2007:2015)) %>%
  filter(gender=="FEMALE")
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("Female", bCACE, seCACE))

dat <- left_join(dat_st, select(survey, personid, gender), by="personid") %>%
  filter(appyear %in% c(2007:2015)) %>%
  filter(gender=="MALE")
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("Male", bCACE, seCACE))

res <- as.data.frame(res)
```

```{r set2_h}

# continue from last chunk, now with region and Pell grant

# code geographic regions 
west <- c("MT", "WA", "ID", "OR", "WY", "CA", "CO", "AK", 
          "NM", "HI", "AZ", "UT", "NV")
south <- c("DE", "KY", "MD", "TN", "DC", "AL", "VA", "MS",
           "VA", "NC", "SC", "AR", "GA", "LA", "FL", "OK", "TX")
midwest <- c("OH", "MN", "IN", "IA", "IL", "MO", "MI", "ND", 
             "WI", "SD", "NE", "KS")
northeast <- c("ME", "NY", "NH", "NJ", "VT", "PA", "MA", 
               "RI", "CT")
dat_st <- dat_st %>%
  mutate(region = case_when(state_ca %in% west~"west",
                            state_ca %in% south~"south", 
                            state_ca %in% midwest~"midwest",
                            state_ca %in% northeast~"northeast"))

dat <- dat_st %>%
  filter(appyear %in% c(2007:2015)) %>%
  filter(region=="west")
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("West", bCACE, seCACE))
res <- as.data.frame(res)

dat <- dat_st %>%
  filter(appyear %in% c(2007:2015)) %>%
  filter(region=="south")
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("South", bCACE, seCACE))
res <- as.data.frame(res)

dat <- dat_st %>%
  filter(appyear %in% c(2007:2015)) %>%
  filter(region=="midwest")
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("Midwest", bCACE, seCACE))
res <- as.data.frame(res)

dat <- dat_st %>%
  filter(appyear %in% c(2007:2015)) %>%
  filter(region=="northeast")
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("Northeast", bCACE, seCACE))
res <- as.data.frame(res)

dat <- left_join(dat_st, select(survey, personid, receivedpellgrants), by="personid") %>%
  filter(appyear %in% c(2007:2015)) %>%
  mutate(pell = case_when(receivedpellgrants %in% c("N", "No")~0,
                          receivedpellgrants %in% c("Y", "Yes, I received a partial Pell Grant", "Yes, I received the maximum Pell Grant")~1)) %>%
  filter(pell==0)
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("No Pell", bCACE, seCACE))

dat <- left_join(dat_st, select(survey, personid, receivedpellgrants), by="personid") %>%
  filter(appyear %in% c(2007:2015)) %>%
  mutate(pell = case_when(receivedpellgrants %in% c("N", "No")~0,
                          receivedpellgrants %in% c("Y", "Yes, I received a partial Pell Grant", "Yes, I received the maximum Pell Grant")~1)) %>%
  filter(pell==1)
resCACE <- rdrobust(x=dat$zscore, y=dat$ever_vote_postTFA_12to5, all=TRUE)
  bCACE  <- resCACE$Estimate[1,"tau.bc"] %>% round(3)
  seCACE <- resCACE$Estimate[1,"se.rb"] %>% round(3)
res <- rbind(res, c("Pell Grant", bCACE, seCACE))

```

```{r}

#bind together results
res$V1 <- factor(res$V1, levels=c("Full Sample", "White", "Non-white", "Male", "Female", #"Young", "Older", 
                                  "West", "South", "Midwest", "Northeast", "Pell Grant", "No Pell"), ordered=T)
res$tau.bc <- as.numeric(res$tau.bc)
res$se.rb <- as.numeric(res$se.rb)

#export plot of heterogeneous effects
pdf(here("Output", "heterogeneity.pdf"), width=11, height=5)
ggplot(data=res) + 
  geom_point(aes(x=V1, y=tau.bc)) + 
  geom_errorbar(aes(x=V1, ymin=tau.bc - se.rb*1.96,
                    ymax = tau.bc + se.rb*1.96),
                width=.5) +
  theme_bw() +
  xlab("Group") + ylab("ITT within Group") +
  theme(text=element_text(size=16)) + 
  geom_hline(yintercept=0, linetype=2)
dev.off()

```


## Alternate outcome: registration and prop. voted

This section calculates the effect of the treatment on two alternative outcomes: whether an applicant was registered to vote, and the proportion of elections in which the applicant voted after TFA participation (or the equivalent 2-year window for non-matriculants). These results are reported in section A9 of the appendix.

```{r load1c}
#load match 1 (survey birthyear) data with both deduplication strategies
setwd(here("Survey_Only"))
load(here("Survey_Only", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE,
                        rm_na_state=TRUE, date_txt="3-17") #Hybrid of 3 app sources
dat_st <- as.data.frame(dat_st)
dat_hybrid <- as.data.frame(dat_hybrid)
```

```{r s1_reg, fig.dim=c(5,5)}
#calculate match 1 treatment effects on registering to vote
plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="registered", 
        y_lim=c(0,0.9),  v="1x2") +
        ylab("Voter Turnout")
```

```{r s1_prop, fig.dim=c(5,5)}
#calculate match 1 treatment effects on voting proportion of post-TFA elections

plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="prop_vote_postTFA_12to5", 
        y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```


```{r load2c}
#load match strategy 2 data, both deduplication strategies 
setwd(here("Survey and App Match"))

dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Survey state
dat_hyrbid <- open_data(match=1, dedup=10, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #app hybrid state
dat_st <- as.data.frame(dat_st)
dat_hybrid <- as.data.frame(dat_hybrid)

#restrict to survey respondents
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started))
dat_hybrid %<>% filter(started==1)
```

```{r s2_reg, fig.dim=c(5,5)}
#run match 2 results for registering to vote
plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="registered", 
        y_lim=c(0,0.9),  v="1x2")  +
        ylab("Voter Turnout")
```

```{r s2_prop, fig.dim=c(5,5)}
#run match 2 results for proportion voting in elections post-TFA
plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="prop_vote_postTFA_12to5", 
        y_lim=c(0,0.7),  v="1x2")  +
        ylab("Voter Turnout")

```


## Alternate outcome: dosage response

This section calculates treatment effects for voting records measured at time periods before TFA matriculants had completed the full program, or an equivalent period for non-matriculants. This analysis is reported in section A9 of the appendix.

```{r load1d}
#load survey birthyear data, survey state deduplication (posttreatment) strategy
setwd(here("Survey_Only"))

load(here("Survey_Only", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey

dat_st <- as.data.frame(dat_st)

```

```{r s1_dosage12, fig.dim=c(5,5)}
#plot effects for 2012 cohort voting in presidential elections between 2012 and 2015
  #that is, effects for an election a few months after beginning the program

plot_rd(filter(dat_st, appyear %in% c(2012)), nbins=50, dv="ever_vote_postAPP_pres_12to5", 
        y_lim=c(0,0.7),  v="1x2" ) +
        ylab("Voter Turnout")
        #d="Post-TFA Results (2012-15 elex, 07-12 cohorts)",
        #t="Survey State and YoB", y_lim=c(0,0.7), v="1x2")
```

```{r s1_dosage11, fig.dim=c(5,5)}

#plot effects for 2011 cohort voting in presidential elections between 2012 and 2015 (i.e. 2012)
  #that is, effects for an election the fall after beginning the program

plot_rd(filter(dat_st, appyear %in% c(2011)), nbins=50, dv="ever_vote_postAPP_pres_12to5", 
        y_lim=c(0,0.7),  v="1x2" ) +
        ylab("Voter Turnout")

```


```{r s1_dosage14, fig.dim=c(5,5)}
#plot effects for 2014 cohort voting in midterm elections between 2012 and 2015 (i.e. 2014)
  #that is, effects for an election months after beginning the program

plot_rd(filter(dat_st, appyear %in% c(2014)), nbins=50, dv="ever_vote_postAPP_mid_12to5", 
        y_lim=c(0,0.7),  v="1x2" ) +
        ylab("Voter Turnout")

```

```{r s1_dosage13, fig.dim=c(5,5)}
#plot effects for 2013 cohort voting in midterm elections between 2012 and 2015 (i.e. 2014)
  #that is, effects for an election the fall after beginning the program

plot_rd(filter(dat_st, appyear %in% c(2013)), nbins=50, dv="ever_vote_postAPP_mid_12to5", 
        y_lim=c(0,0.7),  v="1x2" ) +
        ylab("Voter Turnout")

```




```{r load2d}
#repeat for survey birthyear match strategy (match 2)
setwd(here("Survey and App Match"))

dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Survey

dat_st <- as.data.frame(dat_st)

```

```{r s2_dosage12, fig.dim=c(5,5)}
plot_rd(filter(dat_st, appyear %in% c(2012)), nbins=50, dv="ever_vote_postAPP_pres_12to5", 
        y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```

```{r s2_dosage11, fig.dim=c(5,5)}
plot_rd(filter(dat_st, appyear %in% c(2011)), nbins=50, dv="ever_vote_postAPP_pres_12to5", 
        y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```


```{r s2_dosage14, fig.dim=c(5,5)}
plot_rd(filter(dat_st, appyear %in% c(2014)), nbins=50, dv="ever_vote_postAPP_mid_12to5", 
        y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")

```

```{r s2_dosage13, fig.dim=c(5,5)}
plot_rd(filter(dat_st, appyear %in% c(2013)), nbins=50, dv="ever_vote_postAPP_mid_12to5", 
        y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```

## Alternate outcome: include 2008 and 2010

This analysis adds turnout in the 2008 and 2010 elections to the outcome variables. The main text reports only elections 2012 and later, because a 2016 voter file snapshot has more accurate representations of these later elections, especially for a mobile sample like ours. This analysis is reported in section A9 of the appendix.

```{r load08}
# load match 1 post-treatment data
setwd(here("Survey_Only"))

load(here("Survey_Only", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey
dat_st <- as.data.frame(dat_st)
```

```{r s1_p1_0815, fig.dim=c(5,5)}
#plot results for match 1 turnout 2008 and later

plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_postTFA_08to5", 
        y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```

```{r load082}
#load match 2 pretreatment data
setwd(here("Survey_Only"))
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE,
                        rm_na_state=TRUE, date_txt="3-17") #Hybrid of 3 app sources
dat_hybrid <- as.data.frame(dat_hybrid)
```

```{r pre_p1_0815, fig.dim=c(5,5)}
#plot results for pretreatment elections 2008 and later
plot_rd(filter(dat_hybrid, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_08to5", 
             y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```

```{r load083}
#load match 2 posttreatment data
setwd(here("Survey and App Match"))

dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Survey
dat_st <- as.data.frame(dat_st)


```

```{r s2_p1_0815, fig.dim=c(5,5)}
plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_postTFA_08to5", 
        y_lim=c(0,0.7),  v="1x2")  +
        ylab("Voter Turnout")

```

```{r load084}
#load match 2 pretreatment data
setwd(here("Survey and App Match"))
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Application-based 
#load data with survey responses
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started))
#restrict sample to survey takers
dat_hybrid %<>% filter(started==1)
```

```{r pre_p2_0815, fig.dim=c(5,5)}
plot_rd(filter(dat_hybrid,  appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_08to5", 
             y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```


```{r load10}
# load match 1 post-treatment data
setwd(here("Survey_Only"))

load(here("Survey_Only", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey
dat_st <- as.data.frame(dat_st)
```

```{r s1_p1_1015, fig.dim=c(5,5)}
#plot results for match 1 turnout 2008 and later

plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_postTFA_10to5", 
        y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```

```{r load102}
#load match 2 pretreatment data
setwd(here("Survey_Only"))
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE,
                        rm_na_state=TRUE, date_txt="3-17") #Hybrid of 3 app sources
dat_hybrid <- as.data.frame(dat_hybrid)
```

```{r pre_p1_1015, fig.dim=c(5,5)}
#plot results for pretreatment elections 2008 and later
plot_rd(filter(dat_hybrid, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_10to5", 
             y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```

```{r load103}
#load match 2 posttreatment data
setwd(here("Survey and App Match"))

dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Survey
dat_st <- as.data.frame(dat_st)


```

```{r s2_p1_1015, fig.dim=c(5,5)}
plot_rd(filter(dat_st, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_postTFA_10to5", 
        y_lim=c(0,0.7),  v="1x2")  +
        ylab("Voter Turnout")

```

```{r load010}
#load match 2 pretreatment data
setwd(here("Survey and App Match"))
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Application-based 
#load data with survey responses
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started))
#restrict sample to survey takers
dat_hybrid %<>% filter(started==1)
```

```{r pre_p2_1015, fig.dim=c(5,5)}
plot_rd(filter(dat_hybrid,  appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_10to5", 
             y_lim=c(0,0.7),  v="1x2") +
        ylab("Voter Turnout")
```


## Completed Program (replaces matriculated)

This section presents results using an alternative measure of "compliance"in the fuzzy regression discontinuity setup--the application score instruments for whether an applicant completed the program, rather than whether the applicant matriculated in the program. These results are presented in section A9 of the appendix.

```{r m1_post_complete}
#load match 1 data
setwd(here("Survey_Only"))

load(here("Survey_Only", "Temp_Data", "tfa_to_state_ALL_flags_v2.RData"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey
dat_st <- as.data.frame(dat_st)
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_st$personid <- as.character(dat_st$personid)
dat_st <- left_join(dat_st, tfa_dat_flag %>% select(personid, dispositionstep))
dat_st$complete <- dat_st$dispositionstep=="ALUM COMPLETED"

m1_post <- calc_rd(filter(dat_st, appyear %in% c(2007:2011)), dv="ever_vote_postTFA_12to5", v="1x2", mat4="complete")
```


```{r m2_post_complete}
#load match 2 data
setwd(here("Survey and App Match"))

dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE) #Survey
dat_hybrid <- open_data(match=1, dedup=10, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="4-13", em=TRUE)
dat_st <- as.data.frame(dat_st)
dat_hybrid <- as.data.frame(dat_hybrid)

load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started))
dat_hybrid %<>% filter(started==1)

dat_st$personid <- as.character(dat_st$personid)
dat_st <- left_join(dat_st, tfa_dat_flag %>% select(personid, dispositionstep))
dat_st$complete <- dat_st$dispositionstep=="ALUM COMPLETED"


m2_post <- calc_rd(filter(dat_st, appyear %in% c(2007:2011)), nbins=50, dv="ever_vote_postTFA_12to5", v="1x2", mat4="complete")
```

```{r m1_pre_completed, eval=FALSE}
#load match 1 pretreatment
setwd(here("Survey_Only"))
dat_hybrid_1 <- open_data(match=1, dedup=10, csv=TRUE,
                        rm_na_state=TRUE, date_txt="3-17") #Hybrid of 3 app sources
dat_hybrid_1 <- as.data.frame(dat_hybrid_1)

dat_hybrid_1$personid <- as.character(dat_hybrid_1$personid)
dat_hybrid_1 <- left_join(dat_hybrid_1, select(tfa_dat_flag, personid, started, dispositionstep))
dat_hybrid_1 %<>% filter(started==1)
dat_hybrid_1$complete <- dat_hybrid_1$dispositionstep=="ALUM COMPLETED"

est <- rdrobust(y=dat_hybrid_1$ever_vote_preAPP_12to5, 
         x=dat_hybrid_1$zscore, 
         fuzzy=dat_hybrid_1$complete, 
         all=TRUE)

m1_pre <- calc_rd(filter(dat_hybrid_1, appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_12to5", v="1x2", mat4="matriculated4")
```

```{r m2_pre_completed, eval=FALSE}
#load match2 posttreatment
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_hybrid$personid <- as.character(dat_hybrid$personid)
dat_hybrid <- left_join(dat_hybrid, select(tfa_dat_flag, personid, started, dispositionstep))
dat_hybrid %<>% filter(started==1)
dat_hybrid$complete <- dat_hybrid$dispositionstep=="ALUM COMPLETED"

m2_pre <- calc_rd(filter(dat_hybrid,  appyear %in% c(2007:2015)), nbins=50, dv="ever_vote_preAPP_12to5", v="1x2", mat4="complete")
```

```{r completed_program, fig.dim=c(7,5)}
#combine to create coefficient plot
m1_post$match <- 1
m1_post$type <- "post"
m2_post$match <- 2
m2_post$type <- "post"

effs <- bind_rows(m1_post, m2_post)

dat_text <- data.frame(
  label = c(paste0(m1_post$est[1], "\n (",  m1_post$se[1], ")"),
            paste0(m2_post$est[1], "\n (",  m2_post$se[1], ")"), 
            paste0(m1_post$est[2], "\n (",  m1_post$se[2], ")"),  
            paste0(m2_post$est[2], "\n (",  m2_post$se[2], ")")),
  model   = c("CACE", "CACE", "ITT", "ITT"),
  x     = c(1.3, 2.3, 1.3, 2.3),
  y     = c(m1_post$est[1], m2_post$est[1], m1_post$est[2], m2_post$est[2]), 
  match = c(1,2,1,2)
)


ggplot(effs %>% filter(model=="CACE")) + 
  geom_point(aes(x=factor(match), y=est, group=factor(match), color=factor(match)), 
             position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(x=factor(match), 
                    ymin=est - 1.96*se,
                    ymax=est + 1.96*se,
                    group=factor(match), color=factor(match)),
                width=.0,
             position = position_dodge(width = 0.5)) +
  #facet_wrap(~model, scales="free", ncol=1) +
  theme_bw() + xlab(" ") + ylab("Estimate") + 
  geom_hline(yintercept=0, linetype=2) + 
  scale_color_manual(name="Match Strategy",
                     values=c("firebrick", "dodgerblue")) +
  scale_x_discrete(labels=c("Match 1", "Match 2")) + 
  theme(text=element_text(size=15)) + 
  geom_text(data=dat_text %>% filter(model=="CACE"), aes(x = x, y = y, 
                               label = label, 
                               group=factor(match), 
                               color=factor(match)))

```


## Stats for paper ##

This section calculates various statistics reported in the paper and the appendix. First, we identify which applicants completed the TFA program. We calculate the rate of completion among matriculants (reported in the paper's first footnote). 

```{r, eval=F}
#navigate to combination match folder
setwd(here("Survey and App Match"))
#load data with survey responses
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")

survey <- survey <- tfa_dat_flag
survey <- survey %>% filter(appyear %in% c(2007:2011))

table(survey$dispositionstep[survey$matriculated4==1])
nrow(survey[survey$dispositionstep=="ALUM COMPLETED",])/nrow(survey[survey$matriculated4==1,])
nrow(survey[survey$dispositionstep=="Y2S",])/nrow(survey[survey$matriculated4==1,])
nrow(survey[survey$matriculated4==1&survey$dispositionstep %in% c("ACCEPTED", "ACCEPTED DECLINED", "ACCEPTED NONRESPONSE", 
                                          "ACCEPTED NOTELIGIBLE", "ACTIVENC DIDNOTFINISH",
                                          "ACTIVENC ERNORETURN", "ACTIVENC RESIGNED", "ACTIVENC TERMINATED",
                                          "ASSIGNHOLD NORESPONSE", "ASSIGNHOLD WITHDRAWN", 
                                          "ASSIGNMENT DECLINED", "ASSIGNMENT WITHDRAWN", "CONFIRMED",
                                          "CONFIRMED DECLINED", "CONFIRMED NOSHOW", "CONFIRMED NOTELIGIBLE",
                                          "CONFIRMED RESCINDED", "CONFIRMED WITHDRAWN", "DEFERRED", 
                                          "DEFERRED DECLINED", "CONFIRMED NOSHOW", "CONFIRMED NOTELIGIBLE",
                                          "INSTCOMPL", "INSTCOMPL EMERGREL", "INSTREG", "INSTREG EMERGREL",
                                          "NONCOMPLETER NOSHOW", "NONCOMPLETER RESIGNED", 
                                          "NONCOMPLETER TERIMNATED", "PASSIVENC", "PASSIVENC DIDNOTFINISH",
                                          "PASSIVENC ERNORETURN", "PASSIVENC NOSHOW", "PASSIVENC RESIGNED",
                                          "PASSIVENC TERMINATED", "PREINST", "PREINST EMERGREL"),])/nrow(survey[survey$matriculated4==1,])

```

Next, we use data from the Cooperative Election Study to calculate the rate of turnout among young teachers in the general public. This analysis is reported in section B1 of the appendix.

```{r, eval=F}
library(dataverse)
Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
dat <-
  get_dataframe_by_name(
    filename  = "CCES12_Common_VV.tab",
    dataset   = "10.7910/DVN/HQEVPK",
    .f          = haven::read_dta,
    original    = TRUE,
    server    = "dataverse.harvard.edu"
  )

dat$teacher <- as.numeric(dat$industryclass==16)
dat$voted <- ifelse(dat$e2012g %in% c("Absentee", "Early", "Mail", "Polling", "UnknownMethod"), 1, 0)
dat$gender <- ifelse(dat$gender==1, "MALE", "FEMALE")
dat$race <- ifelse(dat$race==8, 7, dat$race)

summary(lm(voted~teacher, data=dat, weights=weight_vv))
mod2 <- lm_robust(voted~teacher+factor(race)+birthyr+gender, data=dat[dat$birthyr>1985&dat$educ==5,], weights=weight_vv)
rm(dat)

```

We continue this analysis by loading data on the professions of TFA applicants, as reported in the survey.

```{r, eval=F}

#load match 1 posttreatment data
setwd(here("Survey_Only"))
dat_st <- open_data(match=1, dedup=6, csv=TRUE, 
                    rm_na_state=TRUE, date_txt="3-17") #Survey

#load data on program completion
setwd(here("Survey and App Match"))
load("Temp_Data/tfa_to_state_ALL_flags_v2.RData")
dat_st$personid <- as.character(dat_st$personid)
dat_st <- left_join(dat_st, tfa_dat_flag)
dat_st$complete <- dat_st$dispositionstep=="ALUM COMPLETED"

```

Finally, we run regression models (also reported in Section B1 of the appendix) showing the relationship between teaching and turnout in TFA applicants and the general public.


```{r, eval=F}
dat_st <- dat_st %>%
  mutate(teacher = case_when(career1b %in% c("Yes, but I am currently not a full-time school teacher", 
                                             "Yes, I am currently a full-time school teacher")~1,
                             T~0))


dat_st <- dat_st %>%
  mutate(race = case_when(AppEthnicity=="EUROPEAN"~1,
                          AppEthnicity=="AFRICAN"~2,
                          AppEthnicity=="HISPANIC"~3,
                          AppEthnicity %in% c("ASIAN", "ASIANPACIFIC", "NATIVEHAWAI", "ALASKANATIVE")~4,
                          AppEthnicity=="NATIVE"~5,
                          AppEthnicity=="MULTI"~6,
                          AppEthnicity %in% c("OTHER", "PRSNOFCOLOR", "PRSNOFNOCLR")~7)) %>%
  mutate(pell = case_when(ReceivedPellGrants %in% c("N", "No")~0,
                          ReceivedPellGrants %in% c("Y", "Yes, I received a partial Pell Grant", "Yes, I received the maximum Pell Grant")~1)) %>%
  rename("birthyr"=yob)

mod1 <- lm_robust(ever_vote_2006_2016~teacher+birthyr+gender+factor(race)+pell, data=dat_st[dat_st$matriculated4.x==0&dat_st$Gender!="",])

modelsummary(list("TFA Non-Admits"=mod1, "CCES"=mod2), output="latex",
             coef_rename = c("teacher" = "Teacher/Education Sector", 
                             "birthyr" = "Birth Year",
                             "genderMALE"="Male (vs. female omitted cat.)",
                             "factor(race)2" = "Black (vs. white omitted cat.)",
                             "factor(race)3" = "Hispanic",
                             "factor(race)4" = "Asian",
                             "factor(race)5" = "Native",
                             "factor(race)6" = "Mixed",
                             "factor(race)7" = "Other",
                             "pell" = "Received Pell Grant"
                             ))

```

